load libraries

library("tidyverse")
library("plyr")
library("dplyr")
library("ggplot2")
library("scales")
library("ggpubr")
library("gridExtra")
library("grid")
library("GGally")
library("vcfR") # for extracting genotype data from a vcf file
library("data.table")
library("stringr")
library("janitor")
library("plotly") # for the 3d plots
library("ggfortify")
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE,
                     fig.width = 10,
                      fig.asp = 0.6,
                      out.width = "100%")

Load Variant Call Format (VCF) file.

Extract genotypes for each site and individual. The metadata for all samples can be found in here.

vcf <- read.vcfR("/Users/nuriteliash/Documents/GitHub/varroa-linkage-map/data/vcf_filter/Q40BIALLDP16HDP40mis.5Chr7/Q40BIALLDP16HDP40mis.5Chr7.recode.vcf", verbose = FALSE )
vcf
## ***** Object of Class vcfR *****
## 223 samples
## 7 CHROMs
## 35,169 variants
## Object size: 293.5 Mb
## 0 percent missing data
## *****        *****         *****
# extract the genotype for each site in each individual
gt <- extract.gt(vcf, element = "GT") 
gt <- as.data.frame(t(gt)) %>%
  rownames_to_column("ID")
#clean the ID names
gt$ID <- sub("_[^_]+$", "", gt$ID)

table <-  gt %>% 
  t() %>%
  as.data.frame() %>%
  row_to_names(row_number = 1) 

read the meta data table and set the family

meta <- read_csv("/Users/nuriteliash/Documents/GitHub/varroa-linkage-map/data/meta_more.csv")

# set the families 
family = grep("fnd",gt$ID, value=TRUE) %>%
  str_extract("[^_]+")  %>%
  unique()

Control: Homozygotic sites (crosses 1-4)

(1) F1 male (0/0) x female (0/0)

we expect all F2 offspring to be homozygotic (0/0) like their parents (F1)

# Prepare table with observed and expected counts, per family:
# define a list to put all the data frames in
obs <- list()

for (fam in family) {
 
obs[[fam]] <- table %>%
  dplyr::select(starts_with(fam)) %>%
        dplyr::filter_at(vars(matches("_fnd")), all_vars(. == "0/0")) %>% # force F0 female to be homo, like her son
dplyr::filter_at(vars(matches("_son")), all_vars(. == "0/0")) %>%
  dplyr::filter_at(vars(matches("_dat")), all_vars(. == "0/0")) %>% 
  dplyr::select(contains("grn")) %>%
  tidyr::pivot_longer(everything())  %>% 
  dplyr::rename(sample = name, gt = value) %>%
  dplyr::count(sample, gt, .drop = FALSE) %>% 
  dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
  mutate(n = as.numeric(n)) %>%
  group_by(sample) %>%
#  mutate(total = as.numeric(sum(n))) %>%
  dplyr::rename(obs = n)
}

# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs) %>% mutate(sample = as.character(sample))
# replace "NA" with zero value when gt doesn't exists
samples <- data.frame(sample = rep(unique(observed$sample), each =3), gt = rep(c("0/0", "0/1", "1/1"))) # add all possible genotypes

data <- left_join(samples, observed, by=c("sample","gt")) %>%  
    replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists 
    group_by(sample) %>%
    mutate(total = as.numeric(sum(obs))) %>%
    mutate(freq=obs/total) %>% 
    pivot_wider(names_from="gt", values_from=freq, id_cols=c(sample,total)) %>%
    left_join(meta, by="sample") %>%
    rename(homo_ref="0/0") %>%
      rename(hetero="0/1") %>%
    rename(homo_alt="1/1")

head(data)
## # A tibble: 6 × 10
## # Groups:   sample [6]
##   sample      total homo_…¹  hetero homo_…² family dev_s…³ gener…⁴ sex   adult…⁵
##   <chr>       <dbl>   <dbl>   <dbl>   <dbl>  <dbl> <chr>     <dbl> <chr> <chr>  
## 1 63_64a_grn   2540   0.794 0.206   3.94e-4     63 Nymph         2 Nymph no adu…
## 2 63_64b_grn   2382   0.788 0.211   8.40e-4     63 Nymph         2 Nymph no adu…
## 3 63_64c_grn   2452   0.799 0.200   8.16e-4     63 Egg           2 Egg   no adu…
## 4 63_64d_grn   2432   0.980 0.0201  0           63 Egg           2 Egg   no adu…
## 5 600_601a_g…  1163   0.988 0.0120  0          600 Adult         2 male  no adu…
## 6 600_601b_g…  1333   0.990 0.00975 0          600 Egg           2 Egg   no adu…
## # … with abbreviated variable names ¹​homo_ref, ²​homo_alt, ³​dev_stage,
## #   ⁴​generation, ⁵​adult_sisters
data %>% 
  plot_ly(x= ~homo_ref, y =~hetero, z = ~homo_alt,
          color = ~sex, 
          colors = c("Egg"="#FFD301","Nymph"="#9BA4B5","female"="#EB455F","male"="#1982c4"),
          symbol= ~adult_sisters,
          symbols= c("with adult sister"="circle", "no adult sister"= "circle-open"),
          text=~sample,
          hoverinfo= "text") %>%
  add_markers() %>%
  layout(title = 'F2 offspring of F1 male (0/0) x female (0/0)',
        scene = list(
        xaxis = list(title = '0/0', nticks = 3, range = c(0,1)),
        yaxis = list(title = '0/1', nticks = 3, range = c(0,1)),
        zaxis = list(title = '1/1', nticks = 3, range = c(0,1))))
# 2d
df <- left_join(samples, observed, by=c("sample","gt")) %>%  
    replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists 
    group_by(sample) %>%
    mutate(total = as.numeric(sum(obs))) %>%
    mutate(freq=obs/total) %>%
  left_join(meta, by="sample")

p <- df %>% 
  filter(total>10) %>%
  ggplot(aes(y =freq, x = sex, color = gt, 
              shape = adult_sisters,
              text = paste("sample:", sample,
                          ", n:", total))) +
  geom_jitter(width=0.1, size=2) +
  scale_shape_manual(values=c(1,19)) +
scale_color_manual(values=c("#ffbf00", "#66b032","#1982c4")) +
    theme_classic()  +
  ggtitle('F2 offspring of F1 male (0/0) x female (0/0)') +
  guides(color = "none") +
facet_wrap( ~gt)

ggplotly(p) %>% 
  layout(legend = list(orientation = "h", x = 0, y = -0.1)) 

(2) F1 male (1/1) x female (1/1)

we expect all F2 offspring to be homozygotic (1/1) like their parents (F1)

# Prepare table with observed and expected counts, per family:
# define a list to put all the data frames in
obs <- list()

for (fam in family) {
 
obs[[fam]] <- table %>%
  dplyr::select(starts_with(fam)) %>%
        dplyr::filter_at(vars(matches("_fnd")), all_vars(. == "1/1")) %>% # force F0 female to be homo, like her son
dplyr::filter_at(vars(matches("_son")), all_vars(. == "1/1")) %>%
  dplyr::filter_at(vars(matches("_dat")), all_vars(. == "1/1")) %>% 
  dplyr::select(contains("grn")) %>%
  tidyr::pivot_longer(everything())  %>% 
  dplyr::rename(sample = name, gt = value) %>%
  dplyr::count(sample, gt, .drop = FALSE) %>% 
  dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
  mutate(n = as.numeric(n)) %>%
  group_by(sample) %>%
#  mutate(total = as.numeric(sum(n))) %>%
  dplyr::rename(obs = n)
}

# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs) %>% mutate(sample = as.character(sample))
# replace "NA" with zero value when gt doesn't exists
samples <- data.frame(sample = rep(unique(observed$sample), each =3), gt = rep(c("0/0", "0/1", "1/1"))) # add all possible genotypes

data <- left_join(samples, observed, by=c("sample","gt")) %>%  
    replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists 
    group_by(sample) %>%
    mutate(total = as.numeric(sum(obs))) %>%
    mutate(freq=obs/total) %>% 
  mutate(arcsin=asin(sqrt(freq))) %>%
    pivot_wider(names_from="gt", values_from=freq, id_cols=c(sample,total)) %>%
    left_join(meta, by="sample") %>%
    rename(homo_ref="0/0") %>%
      rename(hetero="0/1") %>%
    rename(homo_alt="1/1")

head(data)
## # A tibble: 6 × 10
## # Groups:   sample [6]
##   sample      total homo_…¹  hetero homo_…² family dev_s…³ gener…⁴ sex   adult…⁵
##   <chr>       <dbl>   <dbl>   <dbl>   <dbl>  <dbl> <chr>     <dbl> <chr> <chr>  
## 1 63_64a_grn   1629 1.23e-3 0.178     0.821     63 Nymph         2 Nymph no adu…
## 2 63_64b_grn   1550 6.45e-4 0.188     0.812     63 Nymph         2 Nymph no adu…
## 3 63_64c_grn   1535 6.51e-4 0.180     0.819     63 Egg           2 Egg   no adu…
## 4 63_64d_grn   1537 6.51e-4 0.0143    0.985     63 Egg           2 Egg   no adu…
## 5 600_601a_g…   643 0       0.0218    0.978    600 Adult         2 male  no adu…
## 6 600_601b_g…   778 0       0.00514   0.995    600 Egg           2 Egg   no adu…
## # … with abbreviated variable names ¹​homo_ref, ²​homo_alt, ³​dev_stage,
## #   ⁴​generation, ⁵​adult_sisters
data %>% #dplyr::filter(sex == "male") %>%
#  filter(!sample =="502_503d_grn") %>%
#    filter(!sample =="43_44c_grn") %>%
  plot_ly(x= ~homo_ref, y =~hetero, z = ~homo_alt,
          color = ~sex, 
          colors = c("Egg"="#FFD301","Nymph"="#9BA4B5","female"="#EB455F","male"="#1982c4"),
          symbol= ~adult_sisters,
          symbols= c("yes"="circle", "no"= "circle-open"),
          text=~sample,
          hoverinfo= "text") %>%
  add_markers() %>%
  layout(title = 'F2 offspring of F1 male (1/1) x female (1/1)',
        scene = list(
        xaxis = list(title = '0/0', nticks = 3, range = c(0,1)),
        yaxis = list(title = '0/1', nticks = 3, range = c(0,1)),
        zaxis = list(title = '1/1', nticks = 3, range = c(0,1))))
# 2d
df <- left_join(samples, observed, by=c("sample","gt")) %>%  
    replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists 
    group_by(sample) %>%
    mutate(total = as.numeric(sum(obs))) %>%
    mutate(freq=obs/total) %>%
  left_join(meta, by="sample")

p <- df %>% 
  filter(total>10) %>%
  ggplot(aes(y =freq, x = sex, color = gt, 
              shape = adult_sisters,
              text = paste("sample:", sample,
                          ", n:", total))) +
  geom_jitter(width=0.1, size=2) +
  scale_shape_manual(values=c(1,19)) +
scale_color_manual(values=c("#ffbf00", "#66b032","#1982c4")) +
    theme_classic()  +
  ggtitle('F2 offspring of F1 male (1/1) x female (1/1)') +
  guides(color = "none") +
facet_wrap( ~gt)

ggplotly(p) %>% 
  layout(legend = list(orientation = "h", x = 0, y = -0.1))

Crossing homozygotic male, with heterozygotic female

(5) F1 male (0/0) x female (0/1)

# Prepare table with observed and expected counts, per family:
# define a list to put all the data frames in
obs <- list()

for (fam in family) {
 
obs[[fam]] <- table %>%
  dplyr::select(starts_with(fam)) %>%
        dplyr::filter_at(vars(matches("_fnd")), all_vars(. == "0/0")) %>% # force F0 female to be homo, like her son
dplyr::filter_at(vars(matches("_son")), all_vars(. == "0/0")) %>%
  dplyr::filter_at(vars(matches("_dat")), all_vars(. == "0/1")) %>% 
  dplyr::select(contains("grn")) %>%
  tidyr::pivot_longer(everything())  %>% 
  dplyr::rename(sample = name, gt = value) %>%
  dplyr::count(sample, gt, .drop = FALSE) %>% 
  dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
  mutate(n = as.numeric(n)) %>%
  group_by(sample) %>%
#  mutate(total = as.numeric(sum(n))) %>%
  dplyr::rename(obs = n)
}

# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs) %>% mutate(sample = as.character(sample))
# replace "NA" with zero value when gt doesn't exists
samples <- data.frame(sample = rep(unique(observed$sample), each =3), gt = rep(c("0/0", "0/1", "1/1"))) # add all possible genotypes

# 3d
data <- left_join(samples, observed, by=c("sample","gt")) %>%  
    replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists 
    group_by(sample) %>%
    mutate(total = as.numeric(sum(obs))) %>%
    mutate(freq=obs/total) %>% 
    pivot_wider(names_from="gt", values_from=freq, id_cols=c(sample,total)) %>%
    left_join(meta, by="sample") %>%
    rename(homo_ref="0/0") %>%
      rename(hetero="0/1") %>%
    rename(homo_alt="1/1")

head(data)
## # A tibble: 6 × 10
## # Groups:   sample [6]
##   sample       total homo_…¹ hetero homo_…² family dev_s…³ gener…⁴ sex   adult…⁵
##   <chr>        <dbl>   <dbl>  <dbl>   <dbl>  <dbl> <chr>     <dbl> <chr> <chr>  
## 1 63_64a_grn    3511 0.277    0.560 0.163       63 Nymph         2 Nymph no adu…
## 2 63_64b_grn    3230 0.236    0.549 0.215       63 Nymph         2 Nymph no adu…
## 3 63_64c_grn    3248 0.248    0.579 0.173       63 Egg           2 Egg   no adu…
## 4 63_64d_grn    3259 0.0172   0.977 0.00583     63 Egg           2 Egg   no adu…
## 5 600_601a_gr…   728 0.0110   0.988 0.00137    600 Adult         2 male  no adu…
## 6 600_601b_grn   861 0.00929  0.990 0.00116    600 Egg           2 Egg   no adu…
## # … with abbreviated variable names ¹​homo_ref, ²​homo_alt, ³​dev_stage,
## #   ⁴​generation, ⁵​adult_sisters
data %>% #dplyr::filter(sex == "male") %>%
#  filter(!sample =="502_503d_grn") %>%
#    filter(!sample =="43_44c_grn") %>%
  plot_ly(x= ~homo_ref, y =~hetero, z = ~homo_alt,
          color = ~sex, 
          colors = c("Egg"="#FFD301","Nymph"="#9BA4B5","female"="#EB455F","male"="#1982c4"),
          symbol= ~adult_sisters,
          symbols= c("with adult sister"="circle", "no adult sister"= "circle-open"),
          text=~sample,
          hoverinfo= "text") %>%
  add_markers() %>%
  layout(title = 'F2 offspring of F1 male (0/0) x female (0/1)',
        scene = list(
        xaxis = list(title = '0/0', nticks = 3, range = c(0,1)),
        yaxis = list(title = '0/1', nticks = 3, range = c(0,1)),
        zaxis = list(title = '1/1', nticks = 3, range = c(0,1))))
# 2d
df <- left_join(samples, observed, by=c("sample","gt")) %>%  
    replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists 
    group_by(sample) %>%
    mutate(total = as.numeric(sum(obs))) %>%
    mutate(freq=obs/total) %>%
  left_join(meta, by="sample")

p <- df %>% 
  filter(total>10) %>%
  ggplot(aes(y =freq, x = sex, color = gt, 
              shape = adult_sisters,
              text = paste("sample:", sample,
                          ", n:", total))) +
  geom_jitter(width=0.1, size=2) +
  scale_shape_manual(values=c(1,19)) +
scale_color_manual(values=c("#ffbf00", "#66b032","#1982c4")) +
    theme_classic()  +
  ggtitle('F2 offspring of F1 male (0/0) x female (0/1)') +
  guides(color = "none") +
facet_wrap( ~gt)

ggplotly(p) %>% 
  layout(legend = list(orientation = "h", x = 0, y = -0.1))

(6) F1 male (1/1) x female (0/1)

# Prepare table with observed and expected counts, per family:
# define a list to put all the data frames in
obs <- list()

for (fam in family) {
 
obs[[fam]] <- table %>%
  dplyr::select(starts_with(fam)) %>%
        dplyr::filter_at(vars(matches("_fnd")), all_vars(. == "1/1")) %>% # force F0 female to be homo, like her son
dplyr::filter_at(vars(matches("_son")), all_vars(. == "1/1")) %>%
  dplyr::filter_at(vars(matches("_dat")), all_vars(. == "0/1")) %>% 
  dplyr::select(contains("grn")) %>%
  tidyr::pivot_longer(everything())  %>% 
  dplyr::rename(sample = name, gt = value) %>%
  dplyr::count(sample, gt, .drop = FALSE) %>% 
  dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
  mutate(n = as.numeric(n)) %>%
  group_by(sample) %>%
#  mutate(total = as.numeric(sum(n))) %>%
  dplyr::rename(obs = n)
}

# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs) %>% mutate(sample = as.character(sample))
# replace "NA" with zero value when gt doesn't exists
samples <- data.frame(sample = rep(unique(observed$sample), each =3), gt = rep(c("0/0", "0/1", "1/1"))) # add all possible genotypes

data <- left_join(samples, observed, by=c("sample","gt")) %>%  
    replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists 
    group_by(sample) %>%
    mutate(total = as.numeric(sum(obs))) %>%
    mutate(freq=obs/total) %>% 
    pivot_wider(names_from="gt", values_from=freq, id_cols=c(sample,total)) %>%
    left_join(meta, by="sample") %>%
    rename(homo_ref="0/0") %>%
      rename(hetero="0/1") %>%
    rename(homo_alt="1/1")

head(data)
## # A tibble: 6 × 10
## # Groups:   sample [6]
##   sample       total homo_…¹ hetero homo_…² family dev_s…³ gener…⁴ sex   adult…⁵
##   <chr>        <dbl>   <dbl>  <dbl>   <dbl>  <dbl> <chr>     <dbl> <chr> <chr>  
## 1 63_64a_grn    2383 0.207    0.595 0.198       63 Nymph         2 Nymph no adu…
## 2 63_64b_grn    2222 0.234    0.482 0.284       63 Nymph         2 Nymph no adu…
## 3 63_64c_grn    2256 0.218    0.590 0.193       63 Egg           2 Egg   no adu…
## 4 63_64d_grn    2271 0.00705  0.985 0.00749     63 Egg           2 Egg   no adu…
## 5 600_601a_gr…   624 0.00321  0.997 0          600 Adult         2 male  no adu…
## 6 600_601b_grn   758 0        0.996 0.00396    600 Egg           2 Egg   no adu…
## # … with abbreviated variable names ¹​homo_ref, ²​homo_alt, ³​dev_stage,
## #   ⁴​generation, ⁵​adult_sisters
data %>% 
  plot_ly(x= ~homo_ref, y =~hetero, z = ~homo_alt,
          color = ~sex, 
          colors = c("Egg"="#FFD301","Nymph"="#9BA4B5","female"="#EB455F","male"="#1982c4"),
          symbol= ~adult_sisters,
          symbols= c("with adult sister"="circle", "no adult sister"= "circle-open"),
          text=~sample,
          hoverinfo= "text") %>%
  add_markers() %>%
  layout(title = 'F2 offspring of F1 male (1/1) x female (0/1)',
        scene = list(
        xaxis = list(title = '0/0', nticks = 3, range = c(0,1)),
        yaxis = list(title = '0/1', nticks = 3, range = c(0,1)),
        zaxis = list(title = '1/1', nticks = 3, range = c(0,1))))
# 2d
df <- left_join(samples, observed, by=c("sample","gt")) %>%  
    replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists 
    group_by(sample) %>%
    mutate(total = as.numeric(sum(obs))) %>%
    mutate(freq=obs/total) %>%
  left_join(meta, by="sample")

p <- df %>% 
  filter(total>10) %>%
  ggplot(aes(y =freq, x = sex, color = gt, 
              shape = adult_sisters,
              text = paste("sample:", sample,
                          ", n:", total))) +
  geom_jitter(width=0.1, size=2) +
  scale_shape_manual(values=c(1,19)) +
scale_color_manual(values=c("#ffbf00", "#66b032","#1982c4")) +
    theme_classic()  +
  ggtitle('F2 offspring of F1 male (1/1) x female (0/1)') +
  guides(color = "none") +
facet_wrap( ~gt)

ggplotly(p) %>% 
  layout(legend = list(orientation = "h", x = 0, y = -0.1))

Crossing heterozygotic male, with homozygotic female

(7) F1 male (0/1) x female (0/0)

# Prepare table with observed and expected counts, per family:
# define a list to put all the data frames in
obs <- list()

for (fam in family) {
 
obs[[fam]] <- table %>%
  dplyr::select(starts_with(fam)) %>%
        dplyr::filter_at(vars(matches("_fnd")), all_vars(. == "0/1")) %>% # force F0 female to be homo, like her son
dplyr::filter_at(vars(matches("_son")), all_vars(. == "0/1")) %>%
  dplyr::filter_at(vars(matches("_dat")), all_vars(. == "0/0")) %>% 
  dplyr::select(contains("grn")) %>%
  tidyr::pivot_longer(everything())  %>% 
  dplyr::rename(sample = name, gt = value) %>%
  dplyr::count(sample, gt, .drop = FALSE) %>% 
  dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
  mutate(n = as.numeric(n)) %>%
  group_by(sample) %>%
#  mutate(total = as.numeric(sum(n))) %>%
  dplyr::rename(obs = n)
}

# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs) %>% mutate(sample = as.character(sample))
# replace "NA" with zero value when gt doesn't exists
samples <- data.frame(sample = rep(unique(observed$sample), each =3), gt = rep(c("0/0", "0/1", "1/1"))) # add all possible genotypes

data <- left_join(samples, observed, by=c("sample","gt")) %>%  
    replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists 
    group_by(sample) %>%
    mutate(total = as.numeric(sum(obs))) %>%
    mutate(freq=obs/total) %>% 
    pivot_wider(names_from="gt", values_from=freq, id_cols=c(sample,total)) %>%
    left_join(meta, by="sample") %>%
    rename(homo_ref="0/0") %>%
      rename(hetero="0/1") %>%
    rename(homo_alt="1/1")

head(data)
## # A tibble: 6 × 10
## # Groups:   sample [6]
##   sample       total homo_…¹ hetero homo_…² family dev_s…³ gener…⁴ sex   adult…⁵
##   <chr>        <dbl>   <dbl>  <dbl>   <dbl>  <dbl> <chr>     <dbl> <chr> <chr>  
## 1 63_64a_grn      49   0.531 0.449   0.0204     63 Nymph         2 Nymph no adu…
## 2 63_64b_grn      42   0.5   0.5     0          63 Nymph         2 Nymph no adu…
## 3 63_64c_grn      44   0.432 0.568   0          63 Egg           2 Egg   no adu…
## 4 63_64d_grn      47   0.681 0.298   0.0213     63 Egg           2 Egg   no adu…
## 5 600_601a_gr…   165   0.952 0.0485  0         600 Adult         2 male  no adu…
## 6 600_601b_grn   179   0.944 0.0559  0         600 Egg           2 Egg   no adu…
## # … with abbreviated variable names ¹​homo_ref, ²​homo_alt, ³​dev_stage,
## #   ⁴​generation, ⁵​adult_sisters
data %>% 
  plot_ly(x= ~homo_ref, y =~hetero, z = ~homo_alt,
          color = ~sex, 
          colors = c("Egg"="#FFD301","Nymph"="#9BA4B5","female"="#EB455F","male"="#1982c4"),
          symbol= ~adult_sisters,
          symbols= c("with adult sister"="circle", "no adult sister"= "circle-open"),
          text=~sample,
          hoverinfo= "text") %>%
  add_markers() %>%
  layout(title = 'F2 offspring of F1 male (0/1) x female (0/0)',
        scene = list(
        xaxis = list(title = '0/0', nticks = 3, range = c(0,1)),
        yaxis = list(title = '0/1', nticks = 3, range = c(0,1)),
        zaxis = list(title = '1/1', nticks = 3, range = c(0,1))))
# 2d
df <- left_join(samples, observed, by=c("sample","gt")) %>%  
    replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists 
    group_by(sample) %>%
    mutate(total = as.numeric(sum(obs))) %>%
    mutate(freq=obs/total) %>%
  left_join(meta, by="sample")

p <- df %>% 
  filter(total>10) %>%
  ggplot(aes(y =freq, x = sex, color = gt, 
              shape = adult_sisters,
              text = paste("sample:", sample,
                          ", n:", total))) +
  geom_jitter(width=0.1, size=2) +
  scale_shape_manual(values=c(1,19)) +
scale_color_manual(values=c("#ffbf00", "#66b032","#1982c4")) +
    theme_classic()  +
  ggtitle('F2 offspring of F1 male (1/1) x female (0/1)') +
  guides(color = "none") +
facet_wrap( ~gt)

ggplotly(p) %>% 
  layout(legend = list(orientation = "h", x = 0, y = -0.1))

(8) F1 male (0/1) x female (1/1)

# Prepare table with observed and expected counts, per family:
# define a list to put all the data frames in
obs <- list()

for (fam in family) {
 
obs[[fam]] <- table %>%
  dplyr::select(starts_with(fam)) %>%
        dplyr::filter_at(vars(matches("_fnd")), all_vars(. == "0/1")) %>% # force F0 female to be homo, like her son
dplyr::filter_at(vars(matches("_son")), all_vars(. == "0/1")) %>%
  dplyr::filter_at(vars(matches("_dat")), all_vars(. == "1/1")) %>% 
  dplyr::select(contains("grn")) %>%
  tidyr::pivot_longer(everything())  %>% 
  dplyr::rename(sample = name, gt = value) %>%
  dplyr::count(sample, gt, .drop = FALSE) %>% 
  dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
  mutate(n = as.numeric(n)) %>%
  group_by(sample) %>%
#  mutate(total = as.numeric(sum(n))) %>%
  dplyr::rename(obs = n)
}

# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs) %>% mutate(sample = as.character(sample))
# replace "NA" with zero value when gt doesn't exists
samples <- data.frame(sample = rep(unique(observed$sample), each =3), gt = rep(c("0/0", "0/1", "1/1"))) # add all possible genotypes

data <- left_join(samples, observed, by=c("sample","gt")) %>%  
    replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists 
    group_by(sample) %>%
    mutate(total = as.numeric(sum(obs))) %>%
    mutate(freq=obs/total) %>% 
    pivot_wider(names_from="gt", values_from=freq, id_cols=c(sample,total)) %>%
    left_join(meta, by="sample") %>%
    rename(homo_ref="0/0") %>%
      rename(hetero="0/1") %>%
    rename(homo_alt="1/1")

head(data)
## # A tibble: 6 × 10
## # Groups:   sample [6]
##   sample       total homo_…¹ hetero homo_…² family dev_s…³ gener…⁴ sex   adult…⁵
##   <chr>        <dbl>   <dbl>  <dbl>   <dbl>  <dbl> <chr>     <dbl> <chr> <chr>  
## 1 63_64a_grn       3       0 0.333    0.667     63 Nymph         2 Nymph no adu…
## 2 63_64b_grn       3       0 0.333    0.667     63 Nymph         2 Nymph no adu…
## 3 63_64c_grn       3       0 0        1         63 Egg           2 Egg   no adu…
## 4 63_64d_grn       2       0 0.5      0.5       63 Egg           2 Egg   no adu…
## 5 600_601a_gr…   129       0 0.0155   0.984    600 Adult         2 male  no adu…
## 6 600_601b_grn   160       0 0.0188   0.981    600 Egg           2 Egg   no adu…
## # … with abbreviated variable names ¹​homo_ref, ²​homo_alt, ³​dev_stage,
## #   ⁴​generation, ⁵​adult_sisters
data %>% 
  plot_ly(x= ~homo_ref, y =~hetero, z = ~homo_alt,
          color = ~sex, 
          colors = c("Egg"="#FFD301","Nymph"="#9BA4B5","female"="#EB455F","male"="#1982c4"),
          symbol= ~adult_sisters,
          symbols= c("with adult sister"="circle", "no adult sister"= "circle-open"),
          text=~sample,
          hoverinfo= "text") %>%
  add_markers() %>%
  layout(title = 'F2 offspring of F1 male (0/1) x female (1/1)',
        scene = list(
        xaxis = list(title = '0/0', nticks = 3, range = c(0,1)),
        yaxis = list(title = '0/1', nticks = 3, range = c(0,1)),
        zaxis = list(title = '1/1', nticks = 3, range = c(0,1))))
# 2d
df <- left_join(samples, observed, by=c("sample","gt")) %>%  
    replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists 
    group_by(sample) %>%
    mutate(total = as.numeric(sum(obs))) %>%
    mutate(freq=obs/total) %>%
  left_join(meta, by="sample")

p <- df %>% 
  filter(total>10) %>%
  ggplot(aes(y =freq, x = sex, color = gt, 
              shape = adult_sisters,
              text = paste("sample:", sample,
                          ", n:", total))) +
  geom_jitter(width=0.1, size=2) +
  scale_shape_manual(values=c(1,19)) +
scale_color_manual(values=c("#ffbf00", "#66b032","#1982c4")) +
    theme_classic()  +
  ggtitle('F2 offspring of F1 male (0/1) x female (1/1)') +
  guides(color = "none") +
facet_wrap( ~gt)

ggplotly(p) %>% 
  layout(legend = list(orientation = "h", x = 0, y = -0.1))

Crossing heterozygotic male and female

(9) F1 male (0/1) x female (0/1)

# Prepare table with observed and expected counts, per family:
# define a list to put all the data frames in
obs <- list()

for (fam in family) {
 
obs[[fam]] <- table %>%
  dplyr::select(starts_with(fam)) %>%
        dplyr::filter_at(vars(matches("_fnd")), all_vars(. == "0/1")) %>% # force F0 female to be homo, like her son
dplyr::filter_at(vars(matches("_son")), all_vars(. == "0/1")) %>%
  dplyr::filter_at(vars(matches("_dat")), all_vars(. == "0/1")) %>% 
  dplyr::select(contains("grn")) %>%
  tidyr::pivot_longer(everything())  %>% 
  dplyr::rename(sample = name, gt = value) %>%
  dplyr::count(sample, gt, .drop = FALSE) %>% 
  dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
  mutate(n = as.numeric(n)) %>%
  group_by(sample) %>%
#  mutate(total = as.numeric(sum(n))) %>%
  dplyr::rename(obs = n)
}

# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs) %>% mutate(sample = as.character(sample))
# replace "NA" with zero value when gt doesn't exists
samples <- data.frame(sample = rep(unique(observed$sample), each =3), gt = rep(c("0/0", "0/1", "1/1"))) # add all possible genotypes

data <- left_join(samples, observed, by=c("sample","gt")) %>%  
    replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists 
    group_by(sample) %>%
    mutate(total = as.numeric(sum(obs))) %>%
    mutate(freq=obs/total) %>% 
    pivot_wider(names_from="gt", values_from=freq, id_cols=c(sample,total)) %>%
    left_join(meta, by="sample") %>%
    rename(homo_ref="0/0") %>%
      rename(hetero="0/1") %>%
    rename(homo_alt="1/1")

head(data)
## # A tibble: 6 × 10
## # Groups:   sample [6]
##   sample       total homo_…¹ hetero homo_…² family dev_s…³ gener…⁴ sex   adult…⁵
##   <chr>        <dbl>   <dbl>  <dbl>   <dbl>  <dbl> <chr>     <dbl> <chr> <chr>  
## 1 63_64a_grn      62  0.258   0.710 0.0323      63 Nymph         2 Nymph no adu…
## 2 63_64b_grn      65  0.323   0.677 0           63 Nymph         2 Nymph no adu…
## 3 63_64c_grn      61  0.262   0.705 0.0328      63 Egg           2 Egg   no adu…
## 4 63_64d_grn      65  0.308   0.677 0.0154      63 Egg           2 Egg   no adu…
## 5 600_601a_gr…   459  0       0.993 0.00654    600 Adult         2 male  no adu…
## 6 600_601b_grn   560  0.0125  0.980 0.00714    600 Egg           2 Egg   no adu…
## # … with abbreviated variable names ¹​homo_ref, ²​homo_alt, ³​dev_stage,
## #   ⁴​generation, ⁵​adult_sisters
data %>% 
  plot_ly(x= ~homo_ref, y =~hetero, z = ~homo_alt,
          color = ~sex, 
          colors = c("Egg"="#FFD301","Nymph"="#9BA4B5","female"="#EB455F","male"="#1982c4"),
          symbol= ~adult_sisters,
          symbols= c("with adult sister"="circle", "no adult sister"= "circle-open"),
          text=~sample,
          hoverinfo= "text") %>%
  add_markers() %>%
  layout(title = 'F2 offspring of F1 male (0/1) x female (0/1)',
        scene = list(
        xaxis = list(title = '0/0', nticks = 3, range = c(0,1)),
        yaxis = list(title = '0/1', nticks = 3, range = c(0,1)),
        zaxis = list(title = '1/1', nticks = 3, range = c(0,1))))
# 2d
df <- left_join(samples, observed, by=c("sample","gt")) %>%  
    replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists 
    group_by(sample) %>%
    mutate(total = as.numeric(sum(obs))) %>%
    mutate(freq=obs/total) %>%
  left_join(meta, by="sample")

p <- df %>% 
  filter(total>10) %>%
  ggplot(aes(y =freq, x = sex, color = gt, 
              shape = adult_sisters,
              text = paste("sample:", sample,
                          ", n:", total))) +
  geom_jitter(width=0.1, size=2) +
  scale_shape_manual(values=c(1,19)) +
scale_color_manual(values=c("#ffbf00", "#66b032","#1982c4")) +
    theme_classic()  +
  ggtitle('F2 offspring of F1 male (0/1) x female (0/1)') +
  guides(color = "none") +
facet_wrap( ~gt)

ggplotly(p) %>% 
  layout(legend = list(orientation = "h", x = 0, y = -0.1))